1 Introduction

This R Markdown script analyses data from the PAL (probabilistic associative learning) task of the EMBA project using a drift diffusion model implemented in ggdmc.

1.1 Some general settings

# number of simulations
nsim = 250

# set number of iterations and warmup for models
iter = 3000
warm = 1000

# set the seed
set.seed(2468)

1.2 Package versions

The following packages are used in this RMarkdown file:

## [1] "R version 4.5.1 (2025-06-13)"
## [1] "knitr version 1.50"
## [1] "brms version 2.22.0"
## [1] "ggdmc version 0.2.6.2"
## [1] "tidyverse version 2.0.0"
## [1] "sjPlot version 2.9.0"
## [1] "ggpubr version 0.6.1"
## [1] "ggrain version 0.0.4"
## [1] "rstatix version 0.7.2"
## [1] "easystats version 0.7.5"
## [1] "BayesFactor version 0.9.12.4.7"
## [1] "bayesplot version 1.13.0"
## [1] "bayestestR version 0.17.0"

2 Model setup

# build the model
model = BuildModel(
  p.map     = list(a = "1", v = "P", z = "1", d = "1", sz = "1", sv = "1",
                   t0 = "1", st0 = "1"),
  match.map = list(M = list(s1 = "r1", s2 = "r2")),
  factors   = list(S = c("s1", "s2"), P = c("pre", "vol", "post")),
  constants = c(st0 = 0, d = 0, sz = 0, sv = 0),
  responses = c("r1", "r2"),
  type      = "rd")
## 
## Parameter vector names are: ( see attr(,"p.vector") )
## [1] "a"      "v.pre"  "v.vol"  "v.post" "z"      "t0"    
## 
## Constants are (see attr(,"constants") ):
## st0   d  sz  sv 
##   0   0   0   0 
## 
## Model type = rd
npar = length(GetPNames(model))
nop = 3   # number of phases

# determine the priors
pop.mean  = c(a = 2.0, v.pre = 2.5, v.vol = 2.0, v.post = 2.0, z = 0.5, t0 = 0.3)
pop.scale = c(a = 0.5, v.pre = 0.5, v.vol = 0.5, v.post = 0.5, z = 0.1, t0 = 0.05)
pop.lower = c(0, rep(-5, nop), rep(0, npar-1-nop))
pop.upper = c(5, rep( 7, nop), rep(1, npar-1-nop))

p.prior = BuildPrior(
  dists = rep("tnorm", npar),
  p1    = pop.mean,
  p2    = pop.scale*5,
  lower = pop.lower,
  upper = pop.upper)
mu.prior = p.prior
plot(p.prior)

sigma.prior = BuildPrior(
  dists = rep("beta", npar),
  p1    = rep(1, npar),
  p2    = rep(1, npar),
  upper = rep(2, npar))

names(sigma.prior) = GetPNames(model)

# collect the priors for the hierarchical model
priors = list(pprior = p.prior, location = mu.prior, scale = sigma.prior)

3 Parameter recovery

if (!file.exists(file.path(ddm_dir, "fit_rec.RData"))) {
  
  # setup for the simulation
  not = 336 # trials per subject
  nos = 22  # subjects per group
  npt = 72  # min trials per phase
  
  # set some narrower priors for the data creation
  pop.prior = BuildPrior(
    dists = rep("tnorm", npar),
    p1    = pop.mean,
    p2    = pop.scale,
    lower = pop.lower,
    upper = pop.upper)
  
  # simulate the data
  sim.dat = simulate(model, prior = pop.prior, nsim = npt, nsub = nos)
  dmi.rec = BuildDMI(sim.dat, model)
  
  fit.rec0 = StartNewsamples(data = dmi.rec, prior = priors)
  
  fit.rec = run(fit.rec0, thin = 16, nmc = 1000)

  save(fit.rec, sim.dat, dmi.rec, file = file.path(ddm_dir, "fit_rec.RData"))
} else {
  load(file.path(ddm_dir, "fit_rec.RData"))
}
# create some plots to check the model
p1 = plot(fit.rec, hyper = TRUE) + theme_bw() + theme(legend.position = "none")
p2 = plot(fit.rec, hyper = TRUE, pll = FALSE) + theme_bw() + theme(legend.position = "none")
p3 = plot(fit.rec) + theme_bw() + theme(legend.position = "none")
p4 = plot(fit.rec, hyper = TRUE, pll = FALSE, den = TRUE) + theme_bw() + theme(legend.position = "none")
# now plot them all together
ggarrange(p1, p2, p3, p4, ncol = 1, heights = c(0.25, 0.75, 1.2, 0.75))

# check the rhats
rhats = hgelman(fit.rec, verbose = TRUE)
## hyper    12    19     7    14     2    22    15    10     8     1     6    21 
##  1.07  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00 
##    11     4     3     9    18    20     5    17    13    16 
##  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00  1.00
# compare estimates and real values
ps    = attr(sim.dat, "parameters")
hest1 = summary(fit.rec, hyper = T, type = 1, recovery = T, ps = pop.mean)
hest2 = summary(fit.rec, hyper = T, type = 2, recovery = T, ps = pop.scale)
round(hest1, 2)
##                    a   t0 v.post v.pre v.vol     z
## True            2.00 0.30   2.00  2.50  2.00  0.50
## 2.5% Estimate   1.71 0.29   1.90  2.22  1.74  0.43
## 50% Estimate    1.91 0.31   2.09  2.45  1.97  0.48
## 97.5% Estimate  2.12 0.33   2.27  2.67  2.19  0.53
## Median-True    -0.09 0.01   0.09 -0.05 -0.03 -0.02
round(hest2, 2)
##                    a   t0 v.post v.pre v.vol    z
## True            0.50 0.05   0.50  0.50  0.50 0.10
## 2.5% Estimate   0.34 0.04   0.29  0.36  0.36 0.08
## 50% Estimate    0.46 0.05   0.40  0.50  0.49 0.11
## 97.5% Estimate  0.66 0.07   0.59  0.72  0.71 0.16
## Median-True    -0.04 0.00  -0.10  0.00 -0.01 0.01
summary(fit.rec, recovery = T, ps = ps)
##          a v.pre v.vol v.post    z   t0
## Mean  1.91  2.45  1.97   2.09 0.48 0.31
## True  1.89  2.39  1.97   2.07 0.47 0.31
## Diff -0.02 -0.06  0.00  -0.02 0.00 0.00
## Sd    0.43  0.45  0.45   0.36 0.10 0.05
## True  0.43  0.51  0.45   0.42 0.10 0.05
## Diff  0.00  0.05 -0.01   0.05 0.00 0.00
##                 a       v.pre        v.vol      v.post            z
## Mean  1.913892482  2.44614272  1.967750646  2.08692429  0.476463946
## True  1.894501506  2.39001639  1.969195572  2.06762211  0.473922741
## Diff -0.019390976 -0.05612633  0.001444926 -0.01930218 -0.002541205
## Sd    0.433868645  0.45379187  0.450509727  0.36397404  0.102637469
## True  0.432998562  0.50552409  0.445240317  0.41895476  0.101584369
## Diff -0.000870083  0.05173223 -0.005269410  0.05498072 -0.001053100
##                t0
## Mean  0.308410419
## True  0.310418194
## Diff  0.002007775
## Sd    0.047261481
## True  0.046147437
## Diff -0.001114044

All looks good! Thus, we can move on to fitting the actual data.

4 Hierarchical model

We compute a hierarchical model separately for our three groups.

# check if it all already exists
if (!file.exists(file.path(ddm_dir, "fit_hddm.RData"))) {
  
  # load in the data
  df = read_csv("../data/PAL-ADHD_data.csv", show_col_types = F) %>%
    # filter out super long and super fast reactions
    filter(rt > 100 & rt < 1500) %>%
    # add all the trial information
    merge(., read_csv("../data/PAL_scheme.csv", show_col_types = F)) %>%
    mutate(
      R = if_else((acc & emo == "positive") | (!acc & emo == "negative"), 
                  "r1", "r2"),
      P = case_match(phase, "prevolatile" ~ "pre", "volatile" ~ "vol", "postvolatile" ~ "post"),
      S = if_else(emo == "positive", "s1", "s2"),
      s = as.factor(subID),
      RT = rt/1000
    ) %>% select(diagnosis, adhd.meds.bin, s, S, P, R, RT)
  
  # initialise output list and dataframe
  m       = list()
  df.part = data.frame()
  
  groups = unique(df$diagnosis)
  
  for (group in groups) {
    
    if (!file.exists(file.path(ddm_dir, sprintf("m_%s.rds", group)))) {
      # select part of the data
      df.grp = df %>% filter(diagnosis == group) %>% 
        select(-diagnosis) %>% droplevels()
      
      # create the data model instance 
      dmi = BuildDMI(df.grp, model)
      
      # do this until rhats are okay
      rhat  = 5
      count = 0
      
      while (rhat >= 1.1) {
        
        # increase counter and thinning
        count = count + 1
        print(sprintf("%d, %d: %s", count, thin, group))
        
        # start sampling
        fit0 = StartNewsamples(data = dmi, prior = priors)
        
        # run more to get it to be stable
        fit = run(fit0, thin = 32, nmc = 1000) # ~33min
  
        # check the rhats
        rhats = hgelman(fit, verbose = TRUE)
        rhat  = max(rhats)# if rhats are fine, then save the fit
        
      }
      
    } else {
      # if it already exists, just load the fit
      fit = readRDS(file.path(ddm_dir, sprintf("m_%s.rds", group)))
    }
    
    # if rhats are fine, then save the "ultimate" fit
    saveRDS(fit, file.path(ddm_dir, sprintf("m_%s.rds", group)))
    
    # get the output
    summary(fit, hyper = T, hci = T) # location and scale for hypers
    part = summary(fit, hyper = F)   # participant parameters
    df.part = rbind(df.part, 
                    as.data.frame(part) %>% rownames_to_column(var = "s") %>% filter(s != "Mean"))
    
    # add to list
    m[[group]] = fit
    
  }
  
  save(m, df, df.part, 
       file = file.path(ddm_dir, "fit_hddm.RData"))
  
} else {
  load(file.path(ddm_dir, "fit_hddm.RData"))
}
p = list()
for (i in 1:length(m)) {
  # create some plots to check the model
  p1 = plot(fit.rec, hyper = TRUE) + theme_bw() + theme(legend.position = "none")
  p2 = plot(fit.rec, hyper = TRUE, pll = FALSE) + theme_bw() + theme(legend.position = "none")
  p3 = plot(fit.rec) + theme_bw() + theme(legend.position = "none")
  p4 = plot(fit.rec, hyper = TRUE, pll = FALSE, den = TRUE) + theme_bw() + theme(legend.position = "none")
  p[[i]]  = ggarrange(p1, p2, p3, p4, ncol = 1, heights = c(0.25, 0.75, 1.2, 0.75))
}
annotate_figure(p[[1]], 
                top = text_grob(sprintf("Model checks: %s", names(m)[1]), 
                                face = "bold", size = 14))

annotate_figure(p[[2]], 
                top = text_grob(sprintf("Model checks: %s", names(m)[2]), 
                                face = "bold", size = 14))

annotate_figure(p[[3]], 
                top = text_grob(sprintf("Model checks: %s", names(m)[3]), 
                                face = "bold", size = 14))

5 Analysis of drift rate

5.1 Model setup

# add diagnosis 
df.part = merge(df.part, df %>% select(s, diagnosis, adhd.meds.bin) %>% distinct())

# convert to long format for drift rates
df.lng = df.part %>%
  pivot_longer(cols = starts_with("v."), values_to = "v") %>%
  mutate(
    phase = case_match(name, 
                       "v.pre"  ~ "prevolatile",
                       "v.vol"  ~ "volatile",
                       "v.post" ~ "postvolatile"),
    phase = factor(phase, levels = c("prevolatile", "volatile", "postvolatile"))
  ) %>%
  mutate_if(is.character, as.factor)

# set and print the contrasts
contrasts(df.lng$diagnosis) = contr.sum(3)
contrasts(df.lng$diagnosis)
##      [,1] [,2]
## ADHD    1    0
## BOTH    0    1
## COMP   -1   -1
contrasts(df.lng$phase) = contr.sum(3)
contrasts(df.lng$phase)
##              [,1] [,2]
## prevolatile     1    0
## volatile        0    1
## postvolatile   -1   -1
# set the formula
f.v = brms::bf(v ~ diagnosis * phase + (1|s))

# set weakly informative priors
priors = c(
  prior(normal(2, 1.50),  class = Intercept),
  prior(normal(0, 0.50),  class = sigma),
  prior(normal(0, 0.50),  class = sd),
  prior(normal(0, 0.50),  class = b)
)

5.2 Posterior predictive checks

As the next step, we fit the model, check whether there are divergence or rhat issues, and then check whether the chains have converged.

# fit the final model
m.v = brm(f.v, seed = 1234,
            df.lng, prior = priors,
            iter = iter, warmup = warm,
            backend = "cmdstanr", threads = threading(2),
            file = file.path(brms_dir, "m_ddm_v"),
            save_pars = save_pars(all = TRUE)
            )
rstan::check_hmc_diagnostics(m.v$fit)
## 
## Divergences:
## 0 of 8000 iterations ended with a divergence.
## 
## Tree depth:
## 0 of 8000 iterations saturated the maximum tree depth of 10.
## 
## Energy:
## E-BFMI indicated no pathological behavior.
# check that rhats are below 1.01
sum(brms::rhat(m.v) >= 1.01, na.rm = T)
## [1] 0
# check the trace plots
post.draws = as_draws_df(m.v)
mcmc_trace(post.draws, regex_pars = "^b_",
           facet_args = list(ncol = 3)) +
  scale_x_continuous(breaks=scales::pretty_breaks(n = 3)) +
  scale_y_continuous(breaks=scales::pretty_breaks(n = 3))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

This model has no pathological behaviour with E-BFMI, no divergent sample and no rhats that are higher or equal to 1.01. Therefore, we go ahead and perform our posterior predictive checks.

# get posterior predictions
post.pred = posterior_predict(m.v, ndraws = nsim)

# check the fit of the predicted data compared to the real data
p1 = pp_check(m.v, ndraws = nsim) + 
  theme_bw() + theme(legend.position = "none")

# distributions of means compared to the real values per group
p2 = ppc_stat_grouped(df.lng$v, post.pred, df.lng$diagnosis) + 
  theme_bw() + theme(legend.position = "none")

p3 = ppc_stat_grouped(df.lng$v, post.pred, df.lng$phase) + 
  theme_bw() + theme(legend.position = "none")

p = ggarrange(p1, p2, p3,
          ncol = 1, labels = "AUTO")
annotate_figure(p, top = text_grob("Posterior predictive checks", 
                                   face = "bold", size = 14))

5.3 Inferences

Now that we are convinced that we can trust our model, we have a look at its estimate and use the hypothesis function to assess our hypotheses and perform explorative tests.

# print a summary
summary(m.v)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: v ~ diagnosis * phase + (1 | s) 
##    Data: df.lng (Number of observations: 198) 
##   Draws: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
##          total post-warmup draws = 8000
## 
## Multilevel Hyperparameters:
## ~s (Number of levels: 66) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.59      0.06     0.49     0.72 1.00     1552     3009
## 
## Regression Coefficients:
##                   Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept             2.31      0.08     2.16     2.47 1.01      912     1772
## diagnosis1           -0.27      0.11    -0.48    -0.07 1.01      883     1820
## diagnosis2           -0.05      0.11    -0.26     0.16 1.01      900     1974
## phase1               -0.03      0.03    -0.09     0.04 1.00     6249     5825
## phase2                0.04      0.03    -0.02     0.11 1.00     6359     5664
## diagnosis1:phase1     0.08      0.05    -0.01     0.17 1.00     5341     5891
## diagnosis2:phase1    -0.01      0.05    -0.11     0.08 1.00     5088     5845
## diagnosis1:phase2     0.01      0.05    -0.08     0.11 1.00     5421     5318
## diagnosis2:phase2    -0.03      0.05    -0.12     0.06 1.00     4940     5486
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.33      0.02     0.29     0.38 1.00     4744     5283
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# get the estimates and compute group comparisons
df.m.v = post.draws %>% 
  select(starts_with("b_")) %>%
  mutate(
    ADHD   = b_Intercept + b_diagnosis1,
    BOTH   = b_Intercept + b_diagnosis2,
    COMP   = b_Intercept - b_diagnosis1 - b_diagnosis2,
    b_COMP = - b_diagnosis1 - b_diagnosis2,
    b_postvolatile = - b_phase1 - b_phase2,
    `e1_ADHDvCOMP` = ADHD - COMP,
    `e2_BOTHvCOMP` = BOTH - COMP,
    `e3_ADHDvBOTH` = ADHD - BOTH,
    `e4_VOLvSTBL`  = (b_Intercept + b_phase2) - ((b_Intercept + b_phase1) + (b_Intercept + b_postvolatile))/2,
    `e5_diagVOLvSTBL` = -2.25*(`b_diagnosis1:phase2` + `b_diagnosis2:phase2`)
  )

# plot the posterior distributions
df.m.v %>%
  select(starts_with("b_")) %>%
  pivot_longer(cols = starts_with("b_"), names_to = "coef", values_to = "estimate") %>%
  subset(!startsWith(coef, "b_Int")) %>%
  mutate(
    coef = substr(coef, 3, nchar(coef)),
    coef = str_replace_all(coef, ":", " x "),
    coef = str_replace_all(coef, "diagnosis1", "ADHD"),
    coef = str_replace_all(coef, "diagnosis2", "BOTH"),
    coef = str_replace_all(coef, "phase1", "prevolatile"),
    coef = str_replace_all(coef, "phase2", "volatile"),
    coef = fct_reorder(coef, desc(estimate))
  ) %>% 
  group_by(coef) %>%
  mutate(
    cred = case_when(
      (mean(estimate) < 0 & quantile(estimate, probs = 0.975) < 0) |
        (mean(estimate) > 0 & quantile(estimate, probs = 0.025) > 0) ~ "credible",
      T ~ "not credible"
    )
  ) %>% ungroup() %>%
  ggplot(aes(x = estimate, y = coef, fill = cred)) +
  geom_vline(xintercept = 0, linetype = 'dashed') +
  ggdist::stat_halfeye(alpha = 0.7) + ylab(NULL) + theme_bw() +
  scale_fill_manual(values = c(c_dark, c_light)) + theme(legend.position = "none")

# get the design matrix to figure out how to set the contrasts
df.des = cbind(df.lng %>% select(diagnosis, phase), 
               model.matrix(~ diagnosis * phase, data = df.lng)) %>%
  ungroup() %>% distinct()

# COMP > ADHD
e1 = hypothesis(m.v, "0 > 2*diagnosis1 + diagnosis2", alpha = 0.025)
e1$hypothesis
##                          Hypothesis  Estimate Est.Error  CI.Lower  CI.Upper
## 1 (0)-(2*diagnosis1+diagnosis2) > 0 0.5876208 0.1809524 0.2371597 0.9484197
##   Evid.Ratio Post.Prob Star
## 1   1332.333   0.99925    *
# COMP > BOTH
e2  = hypothesis(m.v, "0 > diagnosis1 + 2*diagnosis2", alpha = 0.025)
e2$hypothesis
##                          Hypothesis Estimate Est.Error   CI.Lower  CI.Upper
## 1 (0)-(diagnosis1+2*diagnosis2) > 0 0.364679 0.1824531 0.01520972 0.7326949
##   Evid.Ratio Post.Prob Star
## 1   48.68944  0.979875    *
# ADHD < BOTH
e3  = hypothesis(m.v, "diagnosis1 < diagnosis2", alpha = 0.025)
e3$hypothesis
##                      Hypothesis   Estimate Est.Error   CI.Lower  CI.Upper
## 1 (diagnosis1)-(diagnosis2) < 0 -0.2229418 0.1833993 -0.5794976 0.1443912
##   Evid.Ratio Post.Prob Star
## 1   8.070295   0.88975
# volatile versus stable phases
e4  = hypothesis(m.v, "1.5*phase2 > 0", alpha = 0.025)   # volatile - mean(prevolatile, postvolatile)
e4
## Hypothesis Tests for class b:
##         Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (1.5*phase2) > 0     0.06      0.05    -0.04     0.16       8.62       0.9
##   Star
## 1     
## ---
## 'CI': 95%-CI for one-sided and 97.5%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 97.5%;
## for two-sided hypotheses, the value tested against lies outside the 97.5%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# differences in volatility effect depending on ADHD diagnosis
e5 = hypothesis(m.v, "-2.25*(diagnosis1:phase2 + diagnosis2:phase2) > 0") # COMP(vol-stable) - mean(ADHD(vol-stable), BOTH(vol-stable))
e5
## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (-2.25*(diagnosis... > 0     0.04      0.11    -0.14     0.21        1.8
##   Post.Prob Star
## 1      0.64     
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
# equivalence
equivalence_test(df.m.v %>% select(starts_with("e")), 
                 range = rope_range(m.v))
## # Test for Practical Equivalence
## 
##   ROPE: [-0.07 0.07]
## 
## Parameter       |        H0 | inside ROPE |        95% HDI
## ----------------------------------------------------------
## e1_ADHDvCOMP    |  Rejected |      0.00 % | [-0.95, -0.24]
## e2_BOTHvCOMP    | Undecided |      2.72 % | [-0.73, -0.02]
## e3_ADHDvBOTH    | Undecided |     15.57 % |  [-0.58, 0.14]
## e4_VOLvSTBL     | Undecided |     57.29 % |  [-0.04, 0.16]
## e5_diagVOLvSTBL | Undecided |     49.25 % |  [-0.17, 0.25]
# calculate effect sizes
df.effect = post.draws %>%
  mutate(across(starts_with("sd")|starts_with("sigma"), ~.^2)) %>%
  mutate(
    sumvar = sqrt(rowSums(select(., starts_with("sd")|starts_with("sigma")))),
    e1 = -(2*`b_diagnosis1` + `b_diagnosis2`) / sumvar,
    e2 = -(`b_diagnosis1` + 2*`b_diagnosis2`) / sumvar,
    e3 = (-`b_diagnosis1` + `b_diagnosis2`) / sumvar,
    e4 = 1.5*b_phase2 / sumvar,
    e5 = -2.25*(`b_diagnosis1:phase2` + `b_diagnosis2:phase2`) / sumvar
  )

kable(df.effect %>% select(starts_with("e")|starts_with("h")) %>%
        pivot_longer(cols = everything(), values_to = "estimate") %>%
        group_by(name) %>%
        summarise(
          ci.lo = lower_ci(estimate),
          mean  = mean(estimate),
          ci.hi = upper_ci(estimate),
          interpret = interpret_cohens_d(mean)
        ), digits = 3
)
name ci.lo mean ci.hi interpret
e1 0.335 0.872 1.414 large
e2 0.022 0.541 1.088 medium
e3 -0.210 0.331 0.861 small
e4 -0.054 0.093 0.245 very small
e5 -0.248 0.057 0.383 very small

5.4 Check influence of outlier

df.lng = df.lng %>% 
  mutate(
    v.clean = if_else(v > 6, NA, v)
  )

# fit the final model
m.v2 = brm(v.clean ~ diagnosis * phase + (1 | s), 
           seed = 1234, prior = priors, 
           data = df.lng %>% drop_na(),
           iter = iter, warmup = warm,
           backend = "cmdstanr", threads = threading(2),
           file = file.path(brms_dir, "m_ddm_v2"),
           save_pars = save_pars(all = TRUE)
           )

tab_model(m.v, m.v2)
  v v.clean
Predictors Estimates CI (95%) Estimates CI (95%)
Intercept 2.31 2.16 – 2.47 2.31 2.17 – 2.44
diagnosis1 -0.27 -0.48 – -0.07 -0.26 -0.46 – -0.06
diagnosis2 -0.05 -0.26 – 0.16 -0.03 -0.21 – 0.16
phase1 -0.03 -0.09 – 0.04 -0.01 -0.07 – 0.04
phase2 0.04 -0.02 – 0.11 0.05 -0.00 – 0.11
diagnosis1:phase1 0.08 -0.01 – 0.17 0.07 -0.01 – 0.15
diagnosis2:phase1 -0.01 -0.11 – 0.08 -0.03 -0.10 – 0.05
diagnosis1:phase2 0.01 -0.08 – 0.11 -0.00 -0.08 – 0.08
diagnosis2:phase2 -0.03 -0.12 – 0.06 -0.04 -0.12 – 0.04
Random Effects
σ2 0.32 0.29
τ00 0.18 0.14
ICC 0.64 0.67
N 66 s 66 s
Observations 198 197
Marginal R2 / Conditional R2 0.138 / 0.785 0.138 / 0.817
# COMP > ADHD
e1 = hypothesis(m.v2, "0 > 2*diagnosis1 + diagnosis2", alpha = 0.025)
e1$hypothesis
##                          Hypothesis  Estimate Est.Error  CI.Lower  CI.Upper
## 1 (0)-(2*diagnosis1+diagnosis2) > 0 0.5445833 0.1816275 0.1865946 0.8933183
##   Evid.Ratio Post.Prob Star
## 1   726.2727  0.998625    *
# COMP > BOTH
e2  = hypothesis(m.v2, "0 > diagnosis1 + 2*diagnosis2", alpha = 0.025)
e2$hypothesis
##                          Hypothesis  Estimate Est.Error    CI.Lower  CI.Upper
## 1 (0)-(diagnosis1+2*diagnosis2) > 0 0.3091633 0.1710532 -0.03789075 0.6385828
##   Evid.Ratio Post.Prob Star
## 1   23.31611  0.958875
# ADHD < BOTH
e3  = hypothesis(m.v2, "diagnosis1 < diagnosis2", alpha = 0.025)
e3$hypothesis
##                      Hypothesis   Estimate Est.Error   CI.Lower CI.Upper
## 1 (diagnosis1)-(diagnosis2) < 0 -0.2354199 0.1711057 -0.5644204 0.100727
##   Evid.Ratio Post.Prob Star
## 1   11.28879  0.918625

6 Explore influence of medication on drift rate

6.1 Model setup

# combine diagnosis and medication
df.lng.sel = df.lng %>%
  filter(diagnosis != "COMP") %>% droplevels(except = "phase")

# set the contrasts
contrasts(df.lng.sel$phase)
##              [,1] [,2]
## prevolatile     1    0
## volatile        0    1
## postvolatile   -1   -1
contrasts(df.lng.sel$adhd.meds.bin) = contr.sum(2)[c(2,1)]
contrasts(df.lng.sel$adhd.meds.bin)
##       [,1]
## FALSE   -1
## TRUE     1
# update formula
f.v = brms::bf(v ~ adhd.meds.bin * phase + (1|s))

6.2 Posterior predictive checks

As the next step, we fit the model, check whether there are divergence or rhat issues, and then check whether the chains have converged.

# fit the group model
m.v3 = brm(f.v, seed = 4664,
            df.lng.sel, prior = priors,
            iter = iter, warmup = warm,
            backend = "cmdstanr", threads = threading(2),
            file = file.path(brms_dir, "m_ddm_v3"),
            save_pars = save_pars(all = TRUE)
            )
rstan::check_hmc_diagnostics(m.v3$fit)
## 
## Divergences:
## 0 of 8000 iterations ended with a divergence.
## 
## Tree depth:
## 0 of 8000 iterations saturated the maximum tree depth of 10.
## 
## Energy:
## E-BFMI indicated no pathological behavior.
# check that rhats are below 1.01
sum(brms::rhat(m.v3) >= 1.01, na.rm = T)
## [1] 0
# check the trace plots
post.draws = as_draws_df(m.v3)
mcmc_trace(post.draws, regex_pars = "^b_",
           facet_args = list(ncol = 3)) +
  scale_x_continuous(breaks=scales::pretty_breaks(n = 3)) +
  scale_y_continuous(breaks=scales::pretty_breaks(n = 3))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

This model has no pathological behaviour with E-BFMI, no divergent sample and no rhats that are higher or equal to 1.01. Therefore, we go ahead and perform our posterior predictive checks.

# get posterior predictions
post.pred = posterior_predict(m.v3, ndraws = nsim)

# check the fit of the predicted data compared to the real data
p1 = pp_check(m.v3, ndraws = nsim) + 
  theme_bw() + theme(legend.position = "none")

# distributions of means compared to the real values per group
p2 = ppc_stat_grouped(df.lng.sel$v, post.pred, df.lng.sel$adhd.meds.bin) + 
  theme_bw() + theme(legend.position = "none")

p3 = ppc_stat_grouped(df.lng.sel$v, post.pred, df.lng.sel$phase) + 
  theme_bw() + theme(legend.position = "none")

p = ggarrange(p1, p2, p3, #heights = c(1, 2, 1),
          ncol = 1, labels = "AUTO")
annotate_figure(p, top = text_grob("Posterior predictive checks", 
                                   face = "bold", size = 14))

6.3 Inferences

Now that we are convinced that we can trust our model, we have a look at its estimate and use the hypothesis function to assess our hypotheses and perform explorative tests.

# print a summary
summary(m.v3)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: v ~ adhd.meds.bin * phase + (1 | s) 
##    Data: df.lng.sel (Number of observations: 132) 
##   Draws: 4 chains, each with iter = 3000; warmup = 1000; thin = 1;
##          total post-warmup draws = 8000
## 
## Multilevel Hyperparameters:
## ~s (Number of levels: 44) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.55      0.07     0.44     0.70 1.00     1552     2718
## 
## Regression Coefficients:
##                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                 2.14      0.09     1.97     2.32 1.00     1049
## adhd.meds.bin1            0.06      0.09    -0.11     0.22 1.00     1057
## phase1                    0.01      0.03    -0.06     0.07 1.00     6948
## phase2                    0.03      0.03    -0.03     0.10 1.00     7233
## adhd.meds.bin1:phase1     0.01      0.03    -0.06     0.08 1.00     7207
## adhd.meds.bin1:phase2    -0.00      0.03    -0.07     0.06 1.00     7282
##                       Tail_ESS
## Intercept                 2335
## adhd.meds.bin1            1984
## phase1                    6272
## phase2                    6017
## adhd.meds.bin1:phase1     5843
## adhd.meds.bin1:phase2     5958
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.28      0.02     0.24     0.32 1.00     4994     5476
## 
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# get the estimates and plot the posterior distributions
post.draws %>% 
  select(starts_with("b_"), -b_Intercept) %>%
  pivot_longer(cols = everything(), names_to = "coef", values_to = "estimate") %>%
  mutate(
    coef = fct_reorder(coef, desc(estimate))
  ) %>% 
  group_by(coef) %>%
  mutate(
    cred = case_when(
      (mean(estimate) < 0 & quantile(estimate, probs = 0.975) < 0) |
        (mean(estimate) > 0 & quantile(estimate, probs = 0.025) > 0) ~ "credible",
      T ~ "not credible"
    )
  ) %>% ungroup() %>%
  ggplot(aes(x = estimate, y = coef, fill = cred)) +
  geom_vline(xintercept = 0, linetype = 'dashed') +
  ggdist::stat_halfeye(alpha = 0.7) + ylab(NULL) + theme_bw() +
  scale_fill_manual(values =  c(c_dark, "not credible" = c_light)) + theme(legend.position = "none")

7 Plots

# rain cloud plot
p = df.lng %>%
  mutate(
    diagnosis = if_else(diagnosis == "BOTH", "ADHD+ASD", diagnosis)
  ) %>%
  ggplot(aes(phase, v, fill = diagnosis, colour = diagnosis)) + #
  geom_rain(rain.side = 'r',
boxplot.args = list(color = "black", outlier.shape = NA, show.legend = FALSE, alpha = .8),
violin.args = list(color = "black", outlier.shape = NA, alpha = .8),
boxplot.args.pos = list(
  position = ggpp::position_dodgenudge(x = 0, width = 0.3), width = 0.3
),
point.args = list(show.legend = FALSE, alpha = .5),
violin.args.pos = list(
  width = 0.6, position = position_nudge(x = 0.16)),
point.args.pos = list(position = ggpp::position_dodgenudge(x = -0.25, width = 0.1))) +
  scale_fill_manual(values = col.grp) +
  scale_color_manual(values = col.grp) +
  labs(x = "", y = "arbitrary unit") +
  theme_bw() + 
  theme(legend.position.inside = c(0.2, 0.8),
        legend.position = "inside", plot.title = element_text(hjust = 0.5), 
        legend.direction = "horizontal", text = element_text(size = 15),
        legend.title = element_blank())

annotate_figure(p, top = text_grob("Participant-specific drift rates", 
                                   face = "bold", size = 14))

ggsave(filename = file.path("plots", "FigDDM.svg"), 
       units = "cm", width = 27, height = 9)

# include medication into the plot
df.lng %>%
  mutate(
    diagnosis = if_else(diagnosis == "BOTH", "ADHD+ASD", diagnosis)
  ) %>%
  ggplot(aes(diagnosis, v, fill = adhd.meds.bin, colour = adhd.meds.bin)) + #
  geom_rain(rain.side = 'r',
boxplot.args = list(color = "black", outlier.shape = NA, show.legend = FALSE, alpha = .8),
violin.args = list(color = "black", outlier.shape = NA, alpha = .8),
boxplot.args.pos = list(
  position = ggpp::position_dodgenudge(x = 0, width = 0.3), width = 0.3
),
point.args = list(show.legend = FALSE, alpha = .5),
violin.args.pos = list(
  width = 0.6, position = position_nudge(x = 0.16)),
point.args.pos = list(position = ggpp::position_dodgenudge(x = -0.25, width = 0.1))) +
  scale_fill_manual(values = col.cnd) +
  scale_color_manual(values = col.cnd) +
  labs(x = "", y = "arbitrary unit") +
  facet_wrap(. ~ phase) + 
  theme_bw() + 
  theme(legend.position.inside = c(0.2, 0.8),
        legend.position = "inside", plot.title = element_text(hjust = 0.5), 
        legend.direction = "horizontal", text = element_text(size = 15),
        legend.title = element_blank())

# other parameters
df.part %>% select(!starts_with("v")) %>%
  pivot_longer(cols = where(is.numeric)) %>%
  mutate(
    name = factor(case_match(name,
                           "a" ~ "boundary separation",
                           "z" ~ "starting point",
                           "t0" ~ "non-decision time"
                           ))
  ) %>%
  ggplot(aes(1, value, fill = diagnosis, colour = diagnosis)) + #
  geom_rain(rain.side = 'r',
boxplot.args = list(color = "black", outlier.shape = NA, show.legend = FALSE, alpha = .8),
violin.args = list(color = "black", outlier.shape = NA, alpha = .8),
boxplot.args.pos = list(
  position = ggpp::position_dodgenudge(x = 0, width = 0.3), width = 0.3
),
point.args = list(show.legend = FALSE, alpha = .5),
violin.args.pos = list(
  width = 0.6, position = position_nudge(x = 0.16)),
point.args.pos = list(position = ggpp::position_dodgenudge(x = -0.25, width = 0.1))) +
  scale_fill_manual(values = col.grp) +
  scale_color_manual(values = col.grp) +
  labs(title = "Other DDM parameters", x = "", y = "") +
  facet_wrap(. ~ name, scales = "free") + 
  theme_bw() + 
  theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5), 
        legend.direction = "horizontal", text = element_text(size = 15),
        axis.text.x = element_blank(), axis.ticks.x = element_blank())